# Common config file for Nehalem core

[general]
enable_icache_modeling = true

[perf_model/core]
logical_cpus = 1 # number of SMT threads per core
type = interval
core_model = nehalem

[perf_model/core/interval_timer]
dispatch_width = 4
window_size = 128
num_outstanding_loadstores = 10

[perf_model/sync]
reschedule_cost = 1000

[caching_protocol]
type = parametric_dram_directory_msi

[perf_model/branch_predictor]
type = pentium_m
mispredict_penalty=8 # Reflects just the front-end portion (approx) of the penalty for Interval Simulation

[perf_model/tlb]
penalty = 30          # Page walk penalty in cycles

[perf_model/itlb]
size = 128            # Number of I-TLB entries
associativity = 4     # I-TLB associativity

[perf_model/dtlb]
size = 64             # Number of D-TLB entries
associativity = 4     # D-TLB associativity

[perf_model/stlb]
size = 512            # Number of second-level TLB entries
associativity = 4     # S-TLB associativity

[perf_model/cache]
levels = 3

[perf_model/l1_icache]
perfect = false
cache_size = 32
associativity = 4
address_hash = mask
replacement_policy = lru
data_access_time = 4
tags_access_time = 1
perf_model_type = parallel
writethrough = 0
shared_cores = 1

[perf_model/l1_dcache]
perfect = false
cache_size = 32
associativity = 8
address_hash = mask
replacement_policy = lru
data_access_time = 4
tags_access_time = 1
perf_model_type = parallel
writethrough = 0
shared_cores = 1

[perf_model/l2_cache]
perfect = false
cache_size = 256
associativity = 8
address_hash = mask
replacement_policy = lru
data_access_time = 8 # 8.something according to membench, -1 cycle L1 tag access time
# http://www.realworldtech.com/page.cfm?ArticleID=RWT040208182719&p=7
tags_access_time = 3
# Total neighbor L1/L2 access time is around 40/70 cycles (60-70 when it's coming out of L1)
writeback_time = 50 # L3 hit time will be added
perf_model_type = parallel
writethrough = 0
shared_cores = 1

[perf_model/l3_cache]
cache_block_size = 64
address_hash = mask
dvfs_domain = global # L1 and L2 run at core frequency (default), L3 is system frequency
prefetcher = none
writeback_time = 0

[clock_skew_minimization]
scheme = barrier

[clock_skew_minimization/barrier]
quantum = 100

[dvfs]
transition_latency = 2000 # In ns, "under 2 microseconds" according to http://download.intel.com/design/intarch/papers/323671.pdf (page 8)

[dvfs/simple]
cores_per_socket = 1

[power]
vdd = 1.2 # Volts
technology_node = 45 # nm
